ggplot2 is based on the grammar of graphics, the idea that you can build every graph from the same few components: a data set, a set of geoms—visual marks that represent data points, and a coordinate system
# install.packages('ggplot2')
library(ggplot2)
library(readr)
nba_position <- read_csv("~/Desktop/Microsoft/DataVisualization/nba_player_position.csv")
nba_players <- read_csv("~/Desktop/Microsoft/DataVisualization/nba_raw_players.csv")
nba_id_players <- read_csv("~/Desktop/Microsoft/DataVisualization/nba_id_players.csv")
ggplot()會製造出一張空白畫布,供我們疊加更多圖層。
ggplot(nba_position, aes(HEIGHT, WEIGHT))
nba_position$POSITION[nba_position$POSITION == "Forward-Center"] <- "Center-Forward"
nba_position$POSITION[nba_position$POSITION == "Guard-Forward"] <- "Forward-Guard"
選出 LeBron James 的生涯數據資料:
library(dplyr)
LeBron_ID <- nba_id_players[which(nba_id_players$Name=='LeBron James'),]$id
LeBron <- filter(nba_players, PLAYER_ID==LeBron_ID)
show(LeBron)
## # A tibble: 16 x 27
## PLAYER_ID SEASON_ID LEAGUE_ID TEAM_ID TEAM_ABBREVIATI… PLAYER_AGE GP GS
## <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 2544 2003-04 0 1.61e9 CLE 19 79 79
## 2 2544 2004-05 0 1.61e9 CLE 20 80 80
## 3 2544 2005-06 0 1.61e9 CLE 21 79 79
## 4 2544 2006-07 0 1.61e9 CLE 22 78 78
## 5 2544 2007-08 0 1.61e9 CLE 23 75 74
## 6 2544 2008-09 0 1.61e9 CLE 24 81 81
## 7 2544 2009-10 0 1.61e9 CLE 25 76 76
## 8 2544 2010-11 0 1.61e9 MIA 26 79 79
## 9 2544 2011-12 0 1.61e9 MIA 27 62 62
## 10 2544 2012-13 0 1.61e9 MIA 28 76 76
## 11 2544 2013-14 0 1.61e9 MIA 29 77 77
## 12 2544 2014-15 0 1.61e9 CLE 30 69 69
## 13 2544 2015-16 0 1.61e9 CLE 31 76 76
## 14 2544 2016-17 0 1.61e9 CLE 32 74 74
## 15 2544 2017-18 0 1.61e9 CLE 33 82 82
## 16 2544 2018-19 0 1.61e9 LAL 34 55 55
## # … with 19 more variables: MIN <dbl>, FGM <dbl>, FGA <dbl>, FG_PCT <dbl>,
## # FG3M <dbl>, FG3A <dbl>, FG3_PCT <dbl>, FTM <dbl>, FTA <dbl>, FT_PCT <dbl>,
## # OREB <dbl>, DREB <dbl>, REB <dbl>, AST <dbl>, STL <dbl>, BLK <dbl>,
## # TOV <dbl>, PF <dbl>, PTS <dbl>
選出 Dwyane Wade 的生涯數據資料:
Wade_ID <- nba_id_players[which(nba_id_players$Name=='Dwyane Wade'),]$id
Dwyane <- filter(nba_players, PLAYER_ID==Wade_ID & TEAM_ABBREVIATION != 'TOT')
show(Dwyane)
## # A tibble: 17 x 27
## PLAYER_ID SEASON_ID LEAGUE_ID TEAM_ID TEAM_ABBREVIATI… PLAYER_AGE GP GS
## <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 2548 2003-04 0 1.61e9 MIA 22 61 56
## 2 2548 2004-05 0 1.61e9 MIA 23 77 77
## 3 2548 2005-06 0 1.61e9 MIA 24 75 75
## 4 2548 2006-07 0 1.61e9 MIA 25 51 50
## 5 2548 2007-08 0 1.61e9 MIA 26 51 49
## 6 2548 2008-09 0 1.61e9 MIA 27 79 79
## 7 2548 2009-10 0 1.61e9 MIA 28 77 77
## 8 2548 2010-11 0 1.61e9 MIA 29 76 76
## 9 2548 2011-12 0 1.61e9 MIA 30 49 49
## 10 2548 2012-13 0 1.61e9 MIA 31 69 69
## 11 2548 2013-14 0 1.61e9 MIA 32 54 53
## 12 2548 2014-15 0 1.61e9 MIA 33 62 62
## 13 2548 2015-16 0 1.61e9 MIA 34 74 73
## 14 2548 2016-17 0 1.61e9 CHI 35 60 59
## 15 2548 2017-18 0 1.61e9 CLE 36 46 3
## 16 2548 2017-18 0 1.61e9 MIA 36 21 0
## 17 2548 2018-19 0 1.61e9 MIA 37 72 2
## # … with 19 more variables: MIN <dbl>, FGM <dbl>, FGA <dbl>, FG_PCT <dbl>,
## # FG3M <dbl>, FG3A <dbl>, FG3_PCT <dbl>, FTM <dbl>, FTA <dbl>, FT_PCT <dbl>,
## # OREB <dbl>, DREB <dbl>, REB <dbl>, AST <dbl>, STL <dbl>, BLK <dbl>,
## # TOV <dbl>, PF <dbl>, PTS <dbl>
將賽季切分成兩個年份,以賽季開始年份為主:
library(tidyr)
LeBron <- LeBron %>% separate(SEASON_ID,
c("YEAR", "REST"))
Dwyane <- Dwyane %>% separate(SEASON_ID,
c("YEAR", "REST"))
ggplot2 當中用來美化視覺化結果的 mapping,例如設定顏色、形狀、透明度、大小等等,更多用法可見 Aesthetic specifications.
ggplot(nba_position, aes(HEIGHT, WEIGHT))+
geom_point()
ggplot(nba_position, aes(HEIGHT,
WEIGHT,
color = POSITION,
alpha = 0.3,
shape = POSITION
# size = ...
))+
geom_point()
Use a geom to represent data points, use the geom’s aesthetic properties to represent variables. Each function returns a layer.
ggplot(LeBron, aes(as.numeric(YEAR), PTS)) +
geom_line()
可以利用geom_line、geom_ribbon等來使得視覺化結果更能傳達意思,同時使用lab()來更改主標題、x/y軸標題。
ggplot(LeBron, aes(as.numeric(YEAR), PTS)) +
geom_line(color='blue')+
geom_hline(yintercept = 2000, color="red", linetype="dashed")+
geom_ribbon(aes(ymin = 2000, ymax=PTS), fill = 'grey')+
labs(title="LeBron James' overall Points", x='Season', y='Points')
ggplot(nba_position, aes(HEIGHT,
WEIGHT,
color = POSITION,
alpha = 0.3,
shape = POSITION
# size = ...
))+
geom_point(position = 'jitter')
可以透過geom_point(position = 'jitter)'或是geom_jitter()來讓資料點更加分散,達到較好的呈現結果。
ggplot(sample_n(nba_position, 500), aes(HEIGHT,
WEIGHT,
color = POSITION,
alpha = 0.8,
shape = POSITION
# size = ...
))+
geom_jitter()
可以使用guides()來選擇圖例所要顯示的內容,或是是否要顯示顏色、形狀等:
ggplot(nba_position, aes(HEIGHT,
WEIGHT,
color = 'blue',
alpha = 0.3,
shape = POSITION
# size = ...
))+
geom_point(position = 'jitter')+
guides(
colour = "none",
shape = "legend",
alpha=FALSE
)
使用1單位身高作為每個bin的涵蓋區間:
ggplot(nba_position, aes(x=HEIGHT)) +
geom_histogram(binwidth=1)
將不同位置分開,繪製histogram結合密度圖density plot以及標註平均值,可以看出不同位置之間的分布差異:
library(plyr)
mu <- ddply(nba_position, "POSITION", summarise, grp.mean=mean(HEIGHT))
ggplot(nba_position, aes(x=HEIGHT, color=POSITION)) +
geom_histogram(aes(y=..density..), position="identity", alpha=0.5, binwidth = 1)+
geom_density(alpha=0.6)+
geom_vline(data=mu, aes(xintercept=grp.mean, color=POSITION),
linetype="dashed")
繪製 LeBron 歷年的比賽場數資料,並依照所屬球隊上色:
ggplot(LeBron, aes(as.numeric(YEAR), GP, fill=TEAM_ABBREVIATION)) +
geom_bar(stat = "identity") +
xlab('SEASON')
繪製 Dwyane 歷年的比賽場數資料,並依照所屬球隊上色:
ggplot(Dwyane, aes(as.numeric(YEAR), GP, fill=TEAM_ABBREVIATION)) +
geom_bar(position = "dodge", stat = "identity") +
xlab('SEASON')
繪製不同位置的球員數量長條圖:
Position_cnt <- as.data.frame(table(nba_position$POSITION))
colnames(Position_cnt)<-c("Position", "Count")
ggplot(Position_cnt, aes(Position, Count, fill=Position)) +
geom_bar(stat = "identity")
ggplot(nba_position, aes(POSITION, WEIGHT, color=POSITION))+
geom_boxplot()
使用 violin plot 也有類似效果:
ggplot(nba_position, aes(POSITION, WEIGHT, color=POSITION))+
geom_violin(scale = 'area')
可以設置途中 line, rect, text, title, aspect.ratio, axis, legend, panel, plot, strip…etc.的樣式,舉例來說設定圖標題置中:
ggplot(LeBron, aes(as.numeric(YEAR), PTS)) +
geom_line(color='blue')+
geom_hline(yintercept = 2000, color="red", linetype="dashed")+
geom_ribbon(aes(ymin = 2000, ymax=PTS), fill = 'grey')+
labs(title="LeBron James' overall Points", x='Season', y='Points')+
theme(plot.title = element_text(hjust = 0.5))
或是將主題換成classic:
ggplot(LeBron, aes(as.numeric(YEAR), PTS)) +
geom_line(color='blue')+
geom_hline(yintercept = 2000, color="red", linetype="dashed")+
geom_ribbon(aes(ymin = 2000, ymax=PTS), fill = 'grey')+
labs(title="LeBron James' overall Points", x='Season', y='Points')+
theme_classic()
Some plots visualize a transformation of the original data set. Use a stat to choose a common transformation to visualize. 意即繪圖前是否需要進行統計上的轉換、或是繪製統計圖:
ggplot(nba_position, aes(HEIGHT, WEIGHT))+
geom_point(position='jitter', aes(color = POSITION))+
# geom_smooth()
stat_smooth(method = "auto", formula = y ~ x)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(Dwyane, aes(GP, PTS))+
geom_point()+
stat_quantile()
## Smoothing formula not specified. Using: y ~ x
可以去更改資料對應到aes時的對應方式,如將顏色改成連續的漸層色:
ggplot(Dwyane, aes(GP, PTS, color = PF))+
geom_point()+
scale_color_distiller()
或是specify要呈現在圖上的資料類別、對應的顏色、還有呈現在圖例上的名稱等:
ggplot(sample_n(nba_position, 500), aes(HEIGHT,
WEIGHT,
color = POSITION
# alpha = 0.8,
# shape = POSITION
# size = ...
))+
geom_jitter()+
scale_color_manual(
limits=c("Center","Forward","Guard"), # 哪些原始資料值可被視覺化?
values=c("blue","red","green"), # 各別原始值對應的視覺呈現
### 定義刻度尺說明(即guide/legend)
name="Position", # 刻度尺名稱
breaks=c("Center","Forward","Guard"), # 哪些原始值要顯現?
labels=c("C","F","G") # 那些原始值的標籤名稱。
)
## Warning: Removed 38 rows containing missing values (geom_point).
可以更改視覺化結果的座標系統,以達到更好的視覺呈現:
ggplot(Position_cnt, aes(Position, Count, fill=Position)) +
geom_bar(stat = "identity")+
coord_polar(theta = "x", direction=1 )
ggplot(Position_cnt, aes(Position, Count, fill=Position)) +
geom_bar(stat = "identity")+
coord_flip()
如果想要將圖片切成子圖時,可以使用facet_grid()來處理切割的依據:
ggplot(nba_position, aes(HEIGHT, WEIGHT, color = POSITION))+
geom_point(position='jitter')+
facet_grid(POSITION ~ .)